{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = ''\n",
    "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sacrebleu.metrics import BLEU, CHRF, TER\n",
    "\n",
    "bleu = BLEU()\n",
    "chrf = CHRF(word_order = 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ubuntu/malaya/malaya/tokenizer.py:202: FutureWarning: Possible nested set at position 3361\n",
      "  self.tok = re.compile(r'({})'.format('|'.join(pipeline)))\n",
      "/home/ubuntu/malaya/malaya/tokenizer.py:202: FutureWarning: Possible nested set at position 3879\n",
      "  self.tok = re.compile(r'({})'.format('|'.join(pipeline)))\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d0886d1190b44852bfa5cdd48f0bf042",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading:   0%|          | 0.00/233M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d87a7d321622451885c45a1407592843",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading:   0%|          | 0.00/234k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-10-04 11:41:59.749385: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2022-10-04 11:41:59.765725: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected\n",
      "2022-10-04 11:41:59.765741: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: huseincomel-desktop\n",
      "2022-10-04 11:41:59.765746: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: huseincomel-desktop\n",
      "2022-10-04 11:41:59.765800: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: Not found: was unable to find libcuda.so DSO loaded into this program\n",
      "2022-10-04 11:41:59.765825: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.141.3\n"
     ]
    }
   ],
   "source": [
    "import malaya\n",
    "\n",
    "model = malaya.translation.en_ms.transformer(model = 'noisy-base')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from unidecode import unidecode\n",
    "\n",
    "with open('eng_Latn.dev') as fopen:\n",
    "    eng = fopen.read().split('\\n')\n",
    "    \n",
    "with open('zsm_Latn.dev') as fopen:\n",
    "    ms = fopen.read().split('\\n')\n",
    "    \n",
    "right = [unidecode(s) for s in ms]\n",
    "left = [unidecode(s) for s in eng]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████████████████| 499/499 [02:40<00:00,  3.11it/s]\n"
     ]
    }
   ],
   "source": [
    "from tqdm import tqdm\n",
    "\n",
    "batch_size = 2\n",
    "\n",
    "results = []\n",
    "for i in tqdm(range(0, len(left), batch_size)):\n",
    "    r_greedy = model.greedy_decoder(left[i:i + batch_size])\n",
    "    results.extend(r_greedy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "filtered_left, filtered_right = [], []\n",
    "for no, r in enumerate(results):\n",
    "    if len(r):\n",
    "        filtered_left.append(r)\n",
    "        filtered_right.append(right[no])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "refs = [filtered_right]\n",
    "sys = filtered_left"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'name': 'BLEU',\n",
       " 'score': 41.949315647096626,\n",
       " '_mean': -1.0,\n",
       " '_ci': -1.0,\n",
       " '_verbose': '73.4/50.3/35.7/25.7 (BP = 0.977 ratio = 0.977 hyp_len = 21526 ref_len = 22027)',\n",
       " 'bp': 0.9769945746149022,\n",
       " 'counts': [15810, 10330, 6977, 4772],\n",
       " 'totals': [21526, 20529, 19532, 18535],\n",
       " 'sys_len': 21526,\n",
       " 'ref_len': 22027,\n",
       " 'precisions': [73.44606522345072,\n",
       "  50.31906084076185,\n",
       "  35.720868318656564,\n",
       "  25.745886161316427],\n",
       " 'prec_str': '73.4/50.3/35.7/25.7',\n",
       " 'ratio': 0.9772551868161802}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r = bleu.corpus_score(sys, refs)\n",
    "r.__dict__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "chrF2++ = 66.20"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chrf.corpus_score(sys, refs)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
